package com.yaoandw.givebook.site;

import org.apache.log4j.Logger;

import com.yaoandw.crawler.UrlTool;
import com.yaoandw.storage.SimpleCrawlUrl;
import com.yaoandw.util.StringUtil;

public class BxwxorgImpl extends AbstractSite {
	private static Logger logger = Logger.getLogger(BxwxorgImpl.class);
	final public static String SITE_URL = "http://www.bxwx.org";
//	final public static String SITE_URL = "http://www.bxwx.org/binfo/8/8823.htm";
	public BxwxorgImpl(SimpleCrawlUrl crawlUrl){
		super(crawlUrl);
	}

	@Override
	protected String getSiteUrl() {
		return SITE_URL;
	}

	@Override
	protected boolean ifTheLinkIsTarget(String link) {
		return link.endsWith(".txt");
	}

	@Override
	protected boolean ifTheLinkContinueCrawl(String link) {
		if(link.startsWith(getSiteUrl())){//else continue crawling
			if(6 == StringUtil.countChar('/', link) || link.indexOf("reader.php") != -1){//初步观察下来，此网站有6个/的是在线阅读页面，不用抓取
				return false;
			}else{
				return true;
			}
		}
		return false;
	}
}
